{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "13faa11f",
   "metadata": {},
   "source": [
    "#  猎聘、文案"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "8dd92156",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "这是第1页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第2页，接下来将先等待10秒...然后以继续抓取\n",
      "这是第3页，接下来将先等待4秒...然后以继续抓取\n",
      "这是第4页，接下来将先等待3秒...然后以继续抓取\n",
      "这是第5页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第6页，接下来将先等待10秒...然后以继续抓取\n",
      "这是第7页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第8页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第9页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第10页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第11页，接下来将先等待10秒...然后以继续抓取\n",
      "这是第12页，接下来将先等待3秒...然后以继续抓取\n",
      "这是第13页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第14页，接下来将先等待6秒...然后以继续抓取\n",
      "这是第15页，接下来将先等待4秒...然后以继续抓取\n",
      "这是第16页，接下来将先等待4秒...然后以继续抓取\n",
      "这是第17页，接下来将先等待10秒...然后以继续抓取\n",
      "这是第18页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第19页，接下来将先等待8秒...然后以继续抓取\n",
      "这是第20页，接下来将先等待10秒...然后以继续抓取\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dataInfo</th>\n",
       "      <th>dataParams</th>\n",
       "      <th>comp.link</th>\n",
       "      <th>comp.compId</th>\n",
       "      <th>comp.compName</th>\n",
       "      <th>comp.compScale</th>\n",
       "      <th>comp.compLogo</th>\n",
       "      <th>comp.compIndustry</th>\n",
       "      <th>job.labels</th>\n",
       "      <th>job.link</th>\n",
       "      <th>...</th>\n",
       "      <th>recruiter.imId</th>\n",
       "      <th>recruiter.imUserType</th>\n",
       "      <th>recruiter.chatted</th>\n",
       "      <th>recruiter.recruiterName</th>\n",
       "      <th>recruiter.recruiterTitle</th>\n",
       "      <th>recruiter.recruiterId</th>\n",
       "      <th>recruiter.recruiterPhoto</th>\n",
       "      <th>comp.compStage</th>\n",
       "      <th>job.requireWorkYears</th>\n",
       "      <th>job.requireEduLevel</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"imId\":\"cf4ae15f241e152c46fdeacabc467e13\",\"im...</td>\n",
       "      <td>https://www.liepin.com/company/13381579/</td>\n",
       "      <td>13381579</td>\n",
       "      <td>映象传媒</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>63a1823d36ae467b266e060705u.jpg</td>\n",
       "      <td>广告/公关/会展</td>\n",
       "      <td>[本科, 文案策划]</td>\n",
       "      <td>https://www.liepin.com/lptjob/55158569</td>\n",
       "      <td>...</td>\n",
       "      <td>cf4ae15f241e152c46fdeacabc467e13</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>黄女士</td>\n",
       "      <td>HR</td>\n",
       "      <td>cfacd76e295ac665fa2207ecf977ce63</td>\n",
       "      <td>63a19218e6091653de2da74807u.png</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"imId\":\"536fa1a763c729c601ab80e4a8b96567\",\"im...</td>\n",
       "      <td>https://www.liepin.com/company/12248921/</td>\n",
       "      <td>12248921</td>\n",
       "      <td>森德(广州)品牌管理有限公司</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>5ec7727eec4db462f82287b105u.png</td>\n",
       "      <td>广告/公关/会展</td>\n",
       "      <td>[大专, 文案策划, 网站策划, 社交媒体, 社交媒介, 自媒体, IT/互联网/软件]</td>\n",
       "      <td>https://www.liepin.com/lptjob/43465815</td>\n",
       "      <td>...</td>\n",
       "      <td>536fa1a763c729c601ab80e4a8b96567</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>梁女士</td>\n",
       "      <td></td>\n",
       "      <td>cdc49efb5e5f578d5eaf82a2ee9200ba</td>\n",
       "      <td>5f8f986779c7cc70efbf36c008u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"imId\":\"00d12d88671de70a02f523e94e784db2\",\"im...</td>\n",
       "      <td>https://www.liepin.com/company/1054367/</td>\n",
       "      <td>1054367</td>\n",
       "      <td>省广集团</td>\n",
       "      <td>2000-5000人</td>\n",
       "      <td>5d803842d0e79f08cffdfb2208u.png</td>\n",
       "      <td>广告/公关/会展</td>\n",
       "      <td>[本科, 文案策划, 品牌策划, 营销策划, IT/互联网/软件, 广告/传媒/文化]</td>\n",
       "      <td>https://www.liepin.com/lptjob/62515975</td>\n",
       "      <td>...</td>\n",
       "      <td>00d12d88671de70a02f523e94e784db2</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>齐先生</td>\n",
       "      <td>人力资源专员</td>\n",
       "      <td>211e4254c9a5370da289effc9b38a0d6</td>\n",
       "      <td>58535eb07032f7f5a88efa1906a.jpg</td>\n",
       "      <td>沪深A股上市</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"imId\":\"7c758dd4dcd686068d1ff84e0a6c6898\",\"im...</td>\n",
       "      <td>https://www.liepin.com/company/9717274/</td>\n",
       "      <td>9717274</td>\n",
       "      <td>汀兰影像</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>5dfc1c40cd52473b940e5f8f06u.jpg</td>\n",
       "      <td>广播/影视/录音</td>\n",
       "      <td>[本科, 新媒体]</td>\n",
       "      <td>https://www.liepin.com/lptjob/61973333</td>\n",
       "      <td>...</td>\n",
       "      <td>7c758dd4dcd686068d1ff84e0a6c6898</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>吴先生</td>\n",
       "      <td>创始人</td>\n",
       "      <td>9016b36d6820a18d0b87d4d8dbe256a4</td>\n",
       "      <td>5f8f986aea60860b75384fab08u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"imId\":\"8c620620ff067764f0cf75fb14b27437\",\"im...</td>\n",
       "      <td>https://www.liepin.com/company/9912441/</td>\n",
       "      <td>9912441</td>\n",
       "      <td>广州安若希医药科技有限公司</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>医疗机构</td>\n",
       "      <td>[学历不限, 新媒体策划, 文案策划, 平面媒体]</td>\n",
       "      <td>https://www.liepin.com/lptjob/56466729</td>\n",
       "      <td>...</td>\n",
       "      <td>8c620620ff067764f0cf75fb14b27437</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>郑先生</td>\n",
       "      <td>人事经理</td>\n",
       "      <td>18c7e1c6c7383503f08e517cd455a765</td>\n",
       "      <td>5f8f9868f6d1ab58476f24a008u.jpg</td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>%7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...</td>\n",
       "      <td>{\"jobId\":\"52047593\",\"imId\":\"38758724486739335c...</td>\n",
       "      <td>https://www.liepin.com/company/6785693/</td>\n",
       "      <td>6785693</td>\n",
       "      <td>中海达</td>\n",
       "      <td>1000-2000人</td>\n",
       "      <td>5bfe953474719dc6ed74134303a.png</td>\n",
       "      <td>电子/半导体/集成电路</td>\n",
       "      <td>[市场推广, 市场营销, 海外媒介, 社交媒体, 社交媒介]</td>\n",
       "      <td>https://www.liepin.com/job/1952047593.shtml</td>\n",
       "      <td>...</td>\n",
       "      <td>38758724486739335c4deb543ee50277</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>关女士</td>\n",
       "      <td>HRBP经理</td>\n",
       "      <td>db604716124edffb43acf384079c40d1</td>\n",
       "      <td>5f8f9866dfb13a7dee342f1808u.jpg</td>\n",
       "      <td>已上市</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>统招本科</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>%7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...</td>\n",
       "      <td>{\"jobId\":\"61054043\",\"imId\":\"f8b23476b6002c24cc...</td>\n",
       "      <td>https://www.liepin.com/company/13643205/</td>\n",
       "      <td>13643205</td>\n",
       "      <td>广州智选网络有限公司</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>计算机软件</td>\n",
       "      <td>[网络推广]</td>\n",
       "      <td>https://www.liepin.com/job/1961054043.shtml</td>\n",
       "      <td>...</td>\n",
       "      <td>f8b23476b6002c24cc96cca4578a1729</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>黎先生</td>\n",
       "      <td>游戏推广员</td>\n",
       "      <td>871738d8d20d28e0146fc64506305894</td>\n",
       "      <td>64e446eaaf493b045620af6c07u.png</td>\n",
       "      <td>NaN</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>学历不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>%7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...</td>\n",
       "      <td>{\"jobId\":\"60085123\",\"imId\":\"8fc883f8ebd1c926ba...</td>\n",
       "      <td>https://www.liepin.com/company/13611235/</td>\n",
       "      <td>13611235</td>\n",
       "      <td>广东省城际运输服务协会</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>非营利组织</td>\n",
       "      <td>[客户管理]</td>\n",
       "      <td>https://www.liepin.com/job/1960085123.shtml</td>\n",
       "      <td>...</td>\n",
       "      <td>8fc883f8ebd1c926ba33c5f018078816</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>吴女士</td>\n",
       "      <td>部长</td>\n",
       "      <td>417db8b0f08144522c905616972065d3</td>\n",
       "      <td>5f8f98648dbe6273dcf8515508u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>学历不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>%7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...</td>\n",
       "      <td>{\"jobId\":\"50528717\",\"imId\":\"6e42f931ebd3bc0580...</td>\n",
       "      <td>https://www.liepin.com/company/9512580/</td>\n",
       "      <td>9512580</td>\n",
       "      <td>广州市布岛装饰材料有限公司</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>房地产开发经营</td>\n",
       "      <td>[软装工程, 室内装饰, 装饰装潢工程]</td>\n",
       "      <td>https://www.liepin.com/job/1950528717.shtml</td>\n",
       "      <td>...</td>\n",
       "      <td>6e42f931ebd3bc05801c17c79e240538</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>陈先生</td>\n",
       "      <td>人力资源部经理</td>\n",
       "      <td>02f8b1525a4b3674989ab81da6c787c5</td>\n",
       "      <td>5f8f98698dbe6273dcf8516008u.jpg</td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>本科</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>%7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...</td>\n",
       "      <td>{\"jobId\":\"53552645\",\"imId\":\"53a3986e27a13c75e3...</td>\n",
       "      <td>https://www.liepin.com/company/10024877/</td>\n",
       "      <td>10024877</td>\n",
       "      <td>广东甜秘密寝具有限公司</td>\n",
       "      <td>500-999人</td>\n",
       "      <td>6492af4fc4b5802df380cf4303u.png</td>\n",
       "      <td>家具/家居</td>\n",
       "      <td>[网络推广, 内容运营]</td>\n",
       "      <td>https://www.liepin.com/job/1953552645.shtml</td>\n",
       "      <td>...</td>\n",
       "      <td>53a3986e27a13c75e3a362ce23913155</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>王女士</td>\n",
       "      <td>人力资源主管</td>\n",
       "      <td>0962010f13839d5aa4b57f8636786a3a</td>\n",
       "      <td>5ca47b93509919fa0ea15bee05u.png</td>\n",
       "      <td>NaN</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>大专</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>800 rows × 32 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             dataInfo  \\\n",
       "0   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "1   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "2   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "3   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "4   %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "..                                                ...   \n",
       "35  %7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...   \n",
       "36  %7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...   \n",
       "37  %7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...   \n",
       "38  %7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...   \n",
       "39  %7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...   \n",
       "\n",
       "                                           dataParams  \\\n",
       "0   {\"imId\":\"cf4ae15f241e152c46fdeacabc467e13\",\"im...   \n",
       "1   {\"imId\":\"536fa1a763c729c601ab80e4a8b96567\",\"im...   \n",
       "2   {\"imId\":\"00d12d88671de70a02f523e94e784db2\",\"im...   \n",
       "3   {\"imId\":\"7c758dd4dcd686068d1ff84e0a6c6898\",\"im...   \n",
       "4   {\"imId\":\"8c620620ff067764f0cf75fb14b27437\",\"im...   \n",
       "..                                                ...   \n",
       "35  {\"jobId\":\"52047593\",\"imId\":\"38758724486739335c...   \n",
       "36  {\"jobId\":\"61054043\",\"imId\":\"f8b23476b6002c24cc...   \n",
       "37  {\"jobId\":\"60085123\",\"imId\":\"8fc883f8ebd1c926ba...   \n",
       "38  {\"jobId\":\"50528717\",\"imId\":\"6e42f931ebd3bc0580...   \n",
       "39  {\"jobId\":\"53552645\",\"imId\":\"53a3986e27a13c75e3...   \n",
       "\n",
       "                                   comp.link  comp.compId   comp.compName  \\\n",
       "0   https://www.liepin.com/company/13381579/     13381579            映象传媒   \n",
       "1   https://www.liepin.com/company/12248921/     12248921  森德(广州)品牌管理有限公司   \n",
       "2    https://www.liepin.com/company/1054367/      1054367            省广集团   \n",
       "3    https://www.liepin.com/company/9717274/      9717274            汀兰影像   \n",
       "4    https://www.liepin.com/company/9912441/      9912441   广州安若希医药科技有限公司   \n",
       "..                                       ...          ...             ...   \n",
       "35   https://www.liepin.com/company/6785693/      6785693             中海达   \n",
       "36  https://www.liepin.com/company/13643205/     13643205      广州智选网络有限公司   \n",
       "37  https://www.liepin.com/company/13611235/     13611235     广东省城际运输服务协会   \n",
       "38   https://www.liepin.com/company/9512580/      9512580   广州市布岛装饰材料有限公司   \n",
       "39  https://www.liepin.com/company/10024877/     10024877     广东甜秘密寝具有限公司   \n",
       "\n",
       "   comp.compScale                    comp.compLogo comp.compIndustry  \\\n",
       "0          50-99人  63a1823d36ae467b266e060705u.jpg          广告/公关/会展   \n",
       "1          50-99人  5ec7727eec4db462f82287b105u.png          广告/公关/会展   \n",
       "2      2000-5000人  5d803842d0e79f08cffdfb2208u.png          广告/公关/会展   \n",
       "3           1-49人  5dfc1c40cd52473b940e5f8f06u.jpg          广播/影视/录音   \n",
       "4           1-49人  61b07937d0458d53c627567e02u.jpg              医疗机构   \n",
       "..            ...                              ...               ...   \n",
       "35     1000-2000人  5bfe953474719dc6ed74134303a.png       电子/半导体/集成电路   \n",
       "36         50-99人  61b07937d0458d53c627567e02u.jpg             计算机软件   \n",
       "37         50-99人  61b07937d0458d53c627567e02u.jpg             非营利组织   \n",
       "38          1-49人  61b07937d0458d53c627567e02u.jpg           房地产开发经营   \n",
       "39       500-999人  6492af4fc4b5802df380cf4303u.png             家具/家居   \n",
       "\n",
       "                                      job.labels  \\\n",
       "0                                     [本科, 文案策划]   \n",
       "1   [大专, 文案策划, 网站策划, 社交媒体, 社交媒介, 自媒体, IT/互联网/软件]   \n",
       "2    [本科, 文案策划, 品牌策划, 营销策划, IT/互联网/软件, 广告/传媒/文化]   \n",
       "3                                      [本科, 新媒体]   \n",
       "4                      [学历不限, 新媒体策划, 文案策划, 平面媒体]   \n",
       "..                                           ...   \n",
       "35                [市场推广, 市场营销, 海外媒介, 社交媒体, 社交媒介]   \n",
       "36                                        [网络推广]   \n",
       "37                                        [客户管理]   \n",
       "38                          [软装工程, 室内装饰, 装饰装潢工程]   \n",
       "39                                  [网络推广, 内容运营]   \n",
       "\n",
       "                                       job.link  ...  \\\n",
       "0        https://www.liepin.com/lptjob/55158569  ...   \n",
       "1        https://www.liepin.com/lptjob/43465815  ...   \n",
       "2        https://www.liepin.com/lptjob/62515975  ...   \n",
       "3        https://www.liepin.com/lptjob/61973333  ...   \n",
       "4        https://www.liepin.com/lptjob/56466729  ...   \n",
       "..                                          ...  ...   \n",
       "35  https://www.liepin.com/job/1952047593.shtml  ...   \n",
       "36  https://www.liepin.com/job/1961054043.shtml  ...   \n",
       "37  https://www.liepin.com/job/1960085123.shtml  ...   \n",
       "38  https://www.liepin.com/job/1950528717.shtml  ...   \n",
       "39  https://www.liepin.com/job/1953552645.shtml  ...   \n",
       "\n",
       "                      recruiter.imId recruiter.imUserType recruiter.chatted  \\\n",
       "0   cf4ae15f241e152c46fdeacabc467e13                    2             False   \n",
       "1   536fa1a763c729c601ab80e4a8b96567                    2             False   \n",
       "2   00d12d88671de70a02f523e94e784db2                    2             False   \n",
       "3   7c758dd4dcd686068d1ff84e0a6c6898                    2             False   \n",
       "4   8c620620ff067764f0cf75fb14b27437                    2             False   \n",
       "..                               ...                  ...               ...   \n",
       "35  38758724486739335c4deb543ee50277                    2             False   \n",
       "36  f8b23476b6002c24cc96cca4578a1729                    2             False   \n",
       "37  8fc883f8ebd1c926ba33c5f018078816                    2             False   \n",
       "38  6e42f931ebd3bc05801c17c79e240538                    2             False   \n",
       "39  53a3986e27a13c75e3a362ce23913155                    2             False   \n",
       "\n",
       "   recruiter.recruiterName recruiter.recruiterTitle  \\\n",
       "0                      黄女士                       HR   \n",
       "1                      梁女士                            \n",
       "2                      齐先生                   人力资源专员   \n",
       "3                      吴先生                      创始人   \n",
       "4                      郑先生                     人事经理   \n",
       "..                     ...                      ...   \n",
       "35                     关女士                   HRBP经理   \n",
       "36                     黎先生                    游戏推广员   \n",
       "37                     吴女士                       部长   \n",
       "38                     陈先生                  人力资源部经理   \n",
       "39                     王女士                   人力资源主管   \n",
       "\n",
       "               recruiter.recruiterId         recruiter.recruiterPhoto  \\\n",
       "0   cfacd76e295ac665fa2207ecf977ce63  63a19218e6091653de2da74807u.png   \n",
       "1   cdc49efb5e5f578d5eaf82a2ee9200ba  5f8f986779c7cc70efbf36c008u.jpg   \n",
       "2   211e4254c9a5370da289effc9b38a0d6  58535eb07032f7f5a88efa1906a.jpg   \n",
       "3   9016b36d6820a18d0b87d4d8dbe256a4  5f8f986aea60860b75384fab08u.jpg   \n",
       "4   18c7e1c6c7383503f08e517cd455a765  5f8f9868f6d1ab58476f24a008u.jpg   \n",
       "..                               ...                              ...   \n",
       "35  db604716124edffb43acf384079c40d1  5f8f9866dfb13a7dee342f1808u.jpg   \n",
       "36  871738d8d20d28e0146fc64506305894  64e446eaaf493b045620af6c07u.png   \n",
       "37  417db8b0f08144522c905616972065d3  5f8f98648dbe6273dcf8515508u.jpg   \n",
       "38  02f8b1525a4b3674989ab81da6c787c5  5f8f98698dbe6273dcf8516008u.jpg   \n",
       "39  0962010f13839d5aa4b57f8636786a3a  5ca47b93509919fa0ea15bee05u.png   \n",
       "\n",
       "    comp.compStage job.requireWorkYears job.requireEduLevel  \n",
       "0              NaN                  NaN                 NaN  \n",
       "1              NaN                  NaN                 NaN  \n",
       "2           沪深A股上市                  NaN                 NaN  \n",
       "3              NaN                  NaN                 NaN  \n",
       "4            融资未公开                  NaN                 NaN  \n",
       "..             ...                  ...                 ...  \n",
       "35             已上市                 经验不限                统招本科  \n",
       "36             NaN                 经验不限                学历不限  \n",
       "37             NaN                 经验不限                学历不限  \n",
       "38           融资未公开                 经验不限                  本科  \n",
       "39             NaN                 经验不限                  大专  \n",
       "\n",
       "[800 rows x 32 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import crawl_liepin\n",
    "df = crawl_liepin.crawl(城市=\"广州\",关键词=\"文案\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "5b9e4698",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_excel( '猎聘_文案_上海.xlsx')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "767b6c09",
   "metadata": {},
   "source": [
    "# 读取excel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "0f69a94c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "276ca5fe",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>dataInfo</th>\n",
       "      <th>dataParams</th>\n",
       "      <th>comp.link</th>\n",
       "      <th>comp.compId</th>\n",
       "      <th>comp.compName</th>\n",
       "      <th>comp.compScale</th>\n",
       "      <th>comp.compLogo</th>\n",
       "      <th>comp.compIndustry</th>\n",
       "      <th>job.labels</th>\n",
       "      <th>...</th>\n",
       "      <th>recruiter.imId</th>\n",
       "      <th>recruiter.imUserType</th>\n",
       "      <th>recruiter.chatted</th>\n",
       "      <th>recruiter.recruiterName</th>\n",
       "      <th>recruiter.recruiterTitle</th>\n",
       "      <th>recruiter.recruiterId</th>\n",
       "      <th>recruiter.recruiterPhoto</th>\n",
       "      <th>comp.compStage</th>\n",
       "      <th>job.requireWorkYears</th>\n",
       "      <th>job.requireEduLevel</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"imId\":\"cf4ae15f241e152c46fdeacabc467e13\",\"im...</td>\n",
       "      <td>https://www.liepin.com/company/13381579/</td>\n",
       "      <td>13381579</td>\n",
       "      <td>映象传媒</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>63a1823d36ae467b266e060705u.jpg</td>\n",
       "      <td>广告/公关/会展</td>\n",
       "      <td>['本科', '文案策划']</td>\n",
       "      <td>...</td>\n",
       "      <td>cf4ae15f241e152c46fdeacabc467e13</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>黄女士</td>\n",
       "      <td>HR</td>\n",
       "      <td>cfacd76e295ac665fa2207ecf977ce63</td>\n",
       "      <td>63a19218e6091653de2da74807u.png</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"imId\":\"536fa1a763c729c601ab80e4a8b96567\",\"im...</td>\n",
       "      <td>https://www.liepin.com/company/12248921/</td>\n",
       "      <td>12248921</td>\n",
       "      <td>森德(广州)品牌管理有限公司</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>5ec7727eec4db462f82287b105u.png</td>\n",
       "      <td>广告/公关/会展</td>\n",
       "      <td>['大专', '文案策划', '网站策划', '社交媒体', '社交媒介', '自媒体', ...</td>\n",
       "      <td>...</td>\n",
       "      <td>536fa1a763c729c601ab80e4a8b96567</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>梁女士</td>\n",
       "      <td>NaN</td>\n",
       "      <td>cdc49efb5e5f578d5eaf82a2ee9200ba</td>\n",
       "      <td>5f8f986779c7cc70efbf36c008u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"imId\":\"00d12d88671de70a02f523e94e784db2\",\"im...</td>\n",
       "      <td>https://www.liepin.com/company/1054367/</td>\n",
       "      <td>1054367</td>\n",
       "      <td>省广集团</td>\n",
       "      <td>2000-5000人</td>\n",
       "      <td>5d803842d0e79f08cffdfb2208u.png</td>\n",
       "      <td>广告/公关/会展</td>\n",
       "      <td>['本科', '文案策划', '品牌策划', '营销策划', 'IT/互联网/软件', '广...</td>\n",
       "      <td>...</td>\n",
       "      <td>00d12d88671de70a02f523e94e784db2</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>齐先生</td>\n",
       "      <td>人力资源专员</td>\n",
       "      <td>211e4254c9a5370da289effc9b38a0d6</td>\n",
       "      <td>58535eb07032f7f5a88efa1906a.jpg</td>\n",
       "      <td>沪深A股上市</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"imId\":\"7c758dd4dcd686068d1ff84e0a6c6898\",\"im...</td>\n",
       "      <td>https://www.liepin.com/company/9717274/</td>\n",
       "      <td>9717274</td>\n",
       "      <td>汀兰影像</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>5dfc1c40cd52473b940e5f8f06u.jpg</td>\n",
       "      <td>广播/影视/录音</td>\n",
       "      <td>['本科', '新媒体']</td>\n",
       "      <td>...</td>\n",
       "      <td>7c758dd4dcd686068d1ff84e0a6c6898</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>吴先生</td>\n",
       "      <td>创始人</td>\n",
       "      <td>9016b36d6820a18d0b87d4d8dbe256a4</td>\n",
       "      <td>5f8f986aea60860b75384fab08u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>%7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...</td>\n",
       "      <td>{\"imId\":\"8c620620ff067764f0cf75fb14b27437\",\"im...</td>\n",
       "      <td>https://www.liepin.com/company/9912441/</td>\n",
       "      <td>9912441</td>\n",
       "      <td>广州安若希医药科技有限公司</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>医疗机构</td>\n",
       "      <td>['学历不限', '新媒体策划', '文案策划', '平面媒体']</td>\n",
       "      <td>...</td>\n",
       "      <td>8c620620ff067764f0cf75fb14b27437</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>郑先生</td>\n",
       "      <td>人事经理</td>\n",
       "      <td>18c7e1c6c7383503f08e517cd455a765</td>\n",
       "      <td>5f8f9868f6d1ab58476f24a008u.jpg</td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>795</th>\n",
       "      <td>35</td>\n",
       "      <td>%7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...</td>\n",
       "      <td>{\"jobId\":\"52047593\",\"imId\":\"38758724486739335c...</td>\n",
       "      <td>https://www.liepin.com/company/6785693/</td>\n",
       "      <td>6785693</td>\n",
       "      <td>中海达</td>\n",
       "      <td>1000-2000人</td>\n",
       "      <td>5bfe953474719dc6ed74134303a.png</td>\n",
       "      <td>电子/半导体/集成电路</td>\n",
       "      <td>['市场推广', '市场营销', '海外媒介', '社交媒体', '社交媒介']</td>\n",
       "      <td>...</td>\n",
       "      <td>38758724486739335c4deb543ee50277</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>关女士</td>\n",
       "      <td>HRBP经理</td>\n",
       "      <td>db604716124edffb43acf384079c40d1</td>\n",
       "      <td>5f8f9866dfb13a7dee342f1808u.jpg</td>\n",
       "      <td>已上市</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>统招本科</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>796</th>\n",
       "      <td>36</td>\n",
       "      <td>%7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...</td>\n",
       "      <td>{\"jobId\":\"61054043\",\"imId\":\"f8b23476b6002c24cc...</td>\n",
       "      <td>https://www.liepin.com/company/13643205/</td>\n",
       "      <td>13643205</td>\n",
       "      <td>广州智选网络有限公司</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>计算机软件</td>\n",
       "      <td>['网络推广']</td>\n",
       "      <td>...</td>\n",
       "      <td>f8b23476b6002c24cc96cca4578a1729</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>黎先生</td>\n",
       "      <td>游戏推广员</td>\n",
       "      <td>871738d8d20d28e0146fc64506305894</td>\n",
       "      <td>64e446eaaf493b045620af6c07u.png</td>\n",
       "      <td>NaN</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>学历不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>797</th>\n",
       "      <td>37</td>\n",
       "      <td>%7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...</td>\n",
       "      <td>{\"jobId\":\"60085123\",\"imId\":\"8fc883f8ebd1c926ba...</td>\n",
       "      <td>https://www.liepin.com/company/13611235/</td>\n",
       "      <td>13611235</td>\n",
       "      <td>广东省城际运输服务协会</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>非营利组织</td>\n",
       "      <td>['客户管理']</td>\n",
       "      <td>...</td>\n",
       "      <td>8fc883f8ebd1c926ba33c5f018078816</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>吴女士</td>\n",
       "      <td>部长</td>\n",
       "      <td>417db8b0f08144522c905616972065d3</td>\n",
       "      <td>5f8f98648dbe6273dcf8515508u.jpg</td>\n",
       "      <td>NaN</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>学历不限</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>798</th>\n",
       "      <td>38</td>\n",
       "      <td>%7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...</td>\n",
       "      <td>{\"jobId\":\"50528717\",\"imId\":\"6e42f931ebd3bc0580...</td>\n",
       "      <td>https://www.liepin.com/company/9512580/</td>\n",
       "      <td>9512580</td>\n",
       "      <td>广州市布岛装饰材料有限公司</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>61b07937d0458d53c627567e02u.jpg</td>\n",
       "      <td>房地产开发经营</td>\n",
       "      <td>['软装工程', '室内装饰', '装饰装潢工程']</td>\n",
       "      <td>...</td>\n",
       "      <td>6e42f931ebd3bc05801c17c79e240538</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>陈先生</td>\n",
       "      <td>人力资源部经理</td>\n",
       "      <td>02f8b1525a4b3674989ab81da6c787c5</td>\n",
       "      <td>5f8f98698dbe6273dcf8516008u.jpg</td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>本科</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>799</th>\n",
       "      <td>39</td>\n",
       "      <td>%7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...</td>\n",
       "      <td>{\"jobId\":\"53552645\",\"imId\":\"53a3986e27a13c75e3...</td>\n",
       "      <td>https://www.liepin.com/company/10024877/</td>\n",
       "      <td>10024877</td>\n",
       "      <td>广东甜秘密寝具有限公司</td>\n",
       "      <td>500-999人</td>\n",
       "      <td>6492af4fc4b5802df380cf4303u.png</td>\n",
       "      <td>家具/家居</td>\n",
       "      <td>['网络推广', '内容运营']</td>\n",
       "      <td>...</td>\n",
       "      <td>53a3986e27a13c75e3a362ce23913155</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>王女士</td>\n",
       "      <td>人力资源主管</td>\n",
       "      <td>0962010f13839d5aa4b57f8636786a3a</td>\n",
       "      <td>5ca47b93509919fa0ea15bee05u.png</td>\n",
       "      <td>NaN</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>大专</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>800 rows × 33 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Unnamed: 0                                           dataInfo  \\\n",
       "0             0  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "1             1  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "2             2  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "3             3  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "4             4  %7B%22sfrom%22%3A%22search_job_pc%22%2C%22ckId...   \n",
       "..          ...                                                ...   \n",
       "795          35  %7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...   \n",
       "796          36  %7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...   \n",
       "797          37  %7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...   \n",
       "798          38  %7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...   \n",
       "799          39  %7B%22ckId%22%3A%22bbj6whpz3502xykws0ti6rlvtwq...   \n",
       "\n",
       "                                            dataParams  \\\n",
       "0    {\"imId\":\"cf4ae15f241e152c46fdeacabc467e13\",\"im...   \n",
       "1    {\"imId\":\"536fa1a763c729c601ab80e4a8b96567\",\"im...   \n",
       "2    {\"imId\":\"00d12d88671de70a02f523e94e784db2\",\"im...   \n",
       "3    {\"imId\":\"7c758dd4dcd686068d1ff84e0a6c6898\",\"im...   \n",
       "4    {\"imId\":\"8c620620ff067764f0cf75fb14b27437\",\"im...   \n",
       "..                                                 ...   \n",
       "795  {\"jobId\":\"52047593\",\"imId\":\"38758724486739335c...   \n",
       "796  {\"jobId\":\"61054043\",\"imId\":\"f8b23476b6002c24cc...   \n",
       "797  {\"jobId\":\"60085123\",\"imId\":\"8fc883f8ebd1c926ba...   \n",
       "798  {\"jobId\":\"50528717\",\"imId\":\"6e42f931ebd3bc0580...   \n",
       "799  {\"jobId\":\"53552645\",\"imId\":\"53a3986e27a13c75e3...   \n",
       "\n",
       "                                    comp.link  comp.compId   comp.compName  \\\n",
       "0    https://www.liepin.com/company/13381579/     13381579            映象传媒   \n",
       "1    https://www.liepin.com/company/12248921/     12248921  森德(广州)品牌管理有限公司   \n",
       "2     https://www.liepin.com/company/1054367/      1054367            省广集团   \n",
       "3     https://www.liepin.com/company/9717274/      9717274            汀兰影像   \n",
       "4     https://www.liepin.com/company/9912441/      9912441   广州安若希医药科技有限公司   \n",
       "..                                        ...          ...             ...   \n",
       "795   https://www.liepin.com/company/6785693/      6785693             中海达   \n",
       "796  https://www.liepin.com/company/13643205/     13643205      广州智选网络有限公司   \n",
       "797  https://www.liepin.com/company/13611235/     13611235     广东省城际运输服务协会   \n",
       "798   https://www.liepin.com/company/9512580/      9512580   广州市布岛装饰材料有限公司   \n",
       "799  https://www.liepin.com/company/10024877/     10024877     广东甜秘密寝具有限公司   \n",
       "\n",
       "    comp.compScale                    comp.compLogo comp.compIndustry  \\\n",
       "0           50-99人  63a1823d36ae467b266e060705u.jpg          广告/公关/会展   \n",
       "1           50-99人  5ec7727eec4db462f82287b105u.png          广告/公关/会展   \n",
       "2       2000-5000人  5d803842d0e79f08cffdfb2208u.png          广告/公关/会展   \n",
       "3            1-49人  5dfc1c40cd52473b940e5f8f06u.jpg          广播/影视/录音   \n",
       "4            1-49人  61b07937d0458d53c627567e02u.jpg              医疗机构   \n",
       "..             ...                              ...               ...   \n",
       "795     1000-2000人  5bfe953474719dc6ed74134303a.png       电子/半导体/集成电路   \n",
       "796         50-99人  61b07937d0458d53c627567e02u.jpg             计算机软件   \n",
       "797         50-99人  61b07937d0458d53c627567e02u.jpg             非营利组织   \n",
       "798          1-49人  61b07937d0458d53c627567e02u.jpg           房地产开发经营   \n",
       "799       500-999人  6492af4fc4b5802df380cf4303u.png             家具/家居   \n",
       "\n",
       "                                            job.labels  ...  \\\n",
       "0                                       ['本科', '文案策划']  ...   \n",
       "1    ['大专', '文案策划', '网站策划', '社交媒体', '社交媒介', '自媒体', ...  ...   \n",
       "2    ['本科', '文案策划', '品牌策划', '营销策划', 'IT/互联网/软件', '广...  ...   \n",
       "3                                        ['本科', '新媒体']  ...   \n",
       "4                    ['学历不限', '新媒体策划', '文案策划', '平面媒体']  ...   \n",
       "..                                                 ...  ...   \n",
       "795           ['市场推广', '市场营销', '海外媒介', '社交媒体', '社交媒介']  ...   \n",
       "796                                           ['网络推广']  ...   \n",
       "797                                           ['客户管理']  ...   \n",
       "798                         ['软装工程', '室内装饰', '装饰装潢工程']  ...   \n",
       "799                                   ['网络推广', '内容运营']  ...   \n",
       "\n",
       "                       recruiter.imId recruiter.imUserType  recruiter.chatted  \\\n",
       "0    cf4ae15f241e152c46fdeacabc467e13                    2              False   \n",
       "1    536fa1a763c729c601ab80e4a8b96567                    2              False   \n",
       "2    00d12d88671de70a02f523e94e784db2                    2              False   \n",
       "3    7c758dd4dcd686068d1ff84e0a6c6898                    2              False   \n",
       "4    8c620620ff067764f0cf75fb14b27437                    2              False   \n",
       "..                                ...                  ...                ...   \n",
       "795  38758724486739335c4deb543ee50277                    2              False   \n",
       "796  f8b23476b6002c24cc96cca4578a1729                    2              False   \n",
       "797  8fc883f8ebd1c926ba33c5f018078816                    2              False   \n",
       "798  6e42f931ebd3bc05801c17c79e240538                    2              False   \n",
       "799  53a3986e27a13c75e3a362ce23913155                    2              False   \n",
       "\n",
       "     recruiter.recruiterName recruiter.recruiterTitle  \\\n",
       "0                        黄女士                       HR   \n",
       "1                        梁女士                      NaN   \n",
       "2                        齐先生                   人力资源专员   \n",
       "3                        吴先生                      创始人   \n",
       "4                        郑先生                     人事经理   \n",
       "..                       ...                      ...   \n",
       "795                      关女士                   HRBP经理   \n",
       "796                      黎先生                    游戏推广员   \n",
       "797                      吴女士                       部长   \n",
       "798                      陈先生                  人力资源部经理   \n",
       "799                      王女士                   人力资源主管   \n",
       "\n",
       "                recruiter.recruiterId         recruiter.recruiterPhoto  \\\n",
       "0    cfacd76e295ac665fa2207ecf977ce63  63a19218e6091653de2da74807u.png   \n",
       "1    cdc49efb5e5f578d5eaf82a2ee9200ba  5f8f986779c7cc70efbf36c008u.jpg   \n",
       "2    211e4254c9a5370da289effc9b38a0d6  58535eb07032f7f5a88efa1906a.jpg   \n",
       "3    9016b36d6820a18d0b87d4d8dbe256a4  5f8f986aea60860b75384fab08u.jpg   \n",
       "4    18c7e1c6c7383503f08e517cd455a765  5f8f9868f6d1ab58476f24a008u.jpg   \n",
       "..                                ...                              ...   \n",
       "795  db604716124edffb43acf384079c40d1  5f8f9866dfb13a7dee342f1808u.jpg   \n",
       "796  871738d8d20d28e0146fc64506305894  64e446eaaf493b045620af6c07u.png   \n",
       "797  417db8b0f08144522c905616972065d3  5f8f98648dbe6273dcf8515508u.jpg   \n",
       "798  02f8b1525a4b3674989ab81da6c787c5  5f8f98698dbe6273dcf8516008u.jpg   \n",
       "799  0962010f13839d5aa4b57f8636786a3a  5ca47b93509919fa0ea15bee05u.png   \n",
       "\n",
       "     comp.compStage  job.requireWorkYears job.requireEduLevel  \n",
       "0               NaN                   NaN                 NaN  \n",
       "1               NaN                   NaN                 NaN  \n",
       "2            沪深A股上市                   NaN                 NaN  \n",
       "3               NaN                   NaN                 NaN  \n",
       "4             融资未公开                   NaN                 NaN  \n",
       "..              ...                   ...                 ...  \n",
       "795             已上市                  经验不限                统招本科  \n",
       "796             NaN                  经验不限                学历不限  \n",
       "797             NaN                  经验不限                学历不限  \n",
       "798           融资未公开                  经验不限                  本科  \n",
       "799             NaN                  经验不限                  大专  \n",
       "\n",
       "[800 rows x 33 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_excel('猎聘'+'.xlsx')\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "20a5df23",
   "metadata": {},
   "source": [
    "# 详情页面的信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "8e56bbb9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "import time\n",
    "\n",
    "def crawl_text_from_link(url):\n",
    "    headers = {\n",
    "        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'\n",
    "    }\n",
    "\n",
    "    response = requests.get(url, headers=headers, timeout=10)\n",
    "    response.raise_for_status()  \n",
    "\n",
    "    time.sleep(2)  \n",
    "    soup = BeautifulSoup(response.text, 'html.parser')\n",
    "    paragraphs = soup.find_all('p')  \n",
    "    text_from_link = '\\n'.join([paragraph.get_text() for paragraph in paragraphs])\n",
    "\n",
    "    return text_from_link"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e78ba49a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag(\\'js\\', new Date()); gtag(\\'config\\', \\'G-54YTJKWN86\\');\\n【广州招聘信息_广州人才网招聘信息】-猎聘\\n全国\\n首页\\n职位\\n校园\\n海归\\n社区\\n简历优化\\n猎聘APP\\n我是猎头\\n我是招聘方\\nNEW\\n登录/注册\\n热门职位\\n推荐职位\\n热门行业\\n周边城市\\n热门城市\\n其他公司\\n相关公司\\n手机版\\n广州招商运营人员招聘广州云平台存储专家招聘广州场外期权交易员招聘广州电力工程顾问招聘广州反洗钱调查专员招聘广州油化品研发工程师招聘广州汽车电子研究员招聘广州果园管理技术员招聘广州调味品研发总监招聘广州绩效奖金副经理招聘广州学习发展专业经理招聘广州电脑横机技术员招聘广州生物药注册经理招聘广州特定服务招聘广州军品销售招聘广州金融保险岗招聘广州高等数学导师招聘广州口译项目经理招聘广州抗体活性主管招聘广州课题立项申报员招聘广州锦鲤养殖技术员招聘广州淋浴房项目经理招聘广州3D可视化建模招聘广州离合器销售经理招聘广州留学申请主导师招聘广州实验室设计经理招聘广州总装主任工程师招聘广州原料药分析经理招聘广州家电维修服务运营岗招聘广州蓝牙音箱软件工程师招聘\\n广州采购经理/主管招聘广州环境/健康/安全管理招聘广州合伙人招聘广州保安经理招聘广州知识产权代理招聘广州汽车热管理工程师招聘广州网络/信息安全招聘广州列车长/司机招聘广州商业地产招商运营招聘广州医美咨询招聘广州二手车评估师招聘广州电梯工招聘广州其他项目管理职位招聘广州其他财务/审计/税务职位招聘广州广告/会展业务拓展招聘广州音视频/图形开发招聘广州活动执行招聘广州仓储管理招聘广州金融产品经理招聘广州银行零售业务招聘广州投资经理招聘广州校对/录入招聘广州平面设计招聘广州运维经理/主管招聘广州少儿编程教师招聘广州机械设计师招聘广州商品管理招聘广州电力工程师招聘广州充电桩工程师招聘广州广告客户执行AE招聘\\n广州汽车零部件及配件招聘广州航空/航天设备招聘广州农/林/牧/渔招聘广州房地产开发经营招聘广州科技金融招聘广州人力资源服务招聘广州日用杂品招聘广州石化招聘广州货运/物流/仓储招聘广州医疗器械招聘广州室内娱乐招聘广州仪器仪表招聘广州其他制造业招聘广州整车制造招聘广州互联网招聘广州家电招聘广州云计算/大数据招聘广州租赁业招聘广州文娱用品/器材招聘广州政府/公共事业招聘广州医疗机构招聘广州医药流通招聘广州学前教育招聘广州电子/半导体/集成电路招聘广州培训服务招聘广州房地产租赁/中介招聘广州其他行业招聘广州生活服务O2O招聘广州建筑/工程设计招聘广州银行招聘\\n深圳招聘惠州招聘梅州招聘汕尾招聘河源招聘阳江招聘清远招聘东莞招聘韶关招聘中山招聘珠海招聘潮州招聘揭阳招聘汕头招聘云浮招聘佛山招聘江门招聘湛江招聘茂名招聘肇庆招聘\\n北京招聘网上海招聘网广州招聘网厦门招聘网杭州招聘网郑州招聘网南京招聘网天津招聘网重庆招聘网成都招聘网苏州招聘网商丘招聘网大连招聘网济南招聘网宁波招聘网无锡招聘网青岛招聘网沈阳招聘网台州招聘网西安招聘网武汉招聘网\\n广东踔厉律师事务所广东光大国际旅行社有限公司广州保税区宝拓国际贸易有限公司广州科微软件有限公司广东高汇贸易有限公司NEC英富醍亚太贸易(上海)有限公司广州办事处广州松利建材有限公司广州市粤潮电力设备材料公司广州鸿仁堂保健食品有限公司人民北分店法国施耐德电气公司广州办事处欧科镜光学制造(大连)有限公司广州办事处深圳市宏程企业服务管理有限公司广州市海珠区赤岗嘉丽发廊广州市海珠区南石头伍记饭店广州市越秀区三江鞋材经营部广州市白云矿泉兴发保健品店广州市盛贤大沙头旧货交易市场健记精品店广州市白云矿泉大发副食品店广州市天河沙河新兴服饰店广州市海珠区江海辉达建材部朱村镇亭亭时装店广州市白云矿泉禾非服装店增城市新塘镇民利粮油店广州市大沙头通讯港二手交易市场华东电脑经营部广东鞋业城小天使鞋档广州市东山区好仕名雅洋服店广州市隆福汽配中心宏宇汽车音响商行广东唐姝达服饰商行新塘镇汇成电器五金综合经营部广州市海珠区海幢韵姿时装屋\\n粒上皇食品广州逐浪网络信息咨询有限公司广州共富科技有限公司广州市向美葵食品有限公司广州翌拓软件开发有限公司广州宝恒化工科技有限公司吉清工业设计(广州)有限公司广州樱奈儿化妆品有限公司广州鼎誉通信工程有限公司广州百良网络科技有限公司珠海飞扬科技有限公司广州铨聚臭氧科技有限公司广州美视创新科技有限公司广州市前沿环保设备有限公司广州六橙网络科技有限公司广东盖尔伦医学发展有限公司广州三润国际货运代理有限公司鸿睿健康科技(广州)有限公司广州市新创举实业集团有限公司\\n广州招聘\\n当前位置：\\n广州招聘网 > 广州招聘信息\\nwindow.addEventListener(\\'message\\', function(event) {if (event.data && event.data.type === \\'footer-height\\') {document.querySelector(\\'#common-footer\\').setAttribute(\\'height\\', event.data.height + \\'px\\') }}, false) (function () { var imEntryScript = \\'<script crossOrigin=\"anonymous\" src=\"//concat.lietou-static.com/fe-imc-pc/v6/latest/entry.js?q=\\'.concat( Math.floor(Date.now() / 180000), \\'\"><\\\\/script>\\' ); document.write(imEntryScript); })(); { \"@context\": \"https://ziyuan.baidu.com/contexts/cambrian.jsonld\", \"appid\": \"1586030202028057\", \"@id\": \"https://www.liepin.com/city-gz/zhaopin/?scene=seo\", \"title\": \"【广州招聘信息_广州人才网招聘信息】-猎聘\", \"description\": \"猎聘广州招聘信息提供真实的广州人才招聘信息，广州招聘信息有超过10000多高薪人才招聘信息职位任你选，找广州招聘信息就来广州人才网招聘信息频道。\", \"upDate\": \"2023-12-21T08:00:00\", \"data\": { \"WebPage\": { \"headline\": \"【广州招聘信息_广州人才网招聘信息】-猎聘\", \"pcUrl\": \"https://www.liepin.com/city-gz/zhaopin/?scene=seo\", \"wapUrl\": \"https://m.liepin.com/city-gz/zhaopin/?scene=seo\" } } } var _hmt = _hmt || []; (function () { var hm = document.createElement(\"script\"); hm.src = \"//hm.baidu.com/hm.js?a2647413544f5a04f00da7eee0d5e200\"; var s = document.getElementsByTagName(\"script\")[0]; s.parentNode.insertBefore(hm, s); })(); (function () { var bp = document.createElement(\\'script\\'); var curProtocol = window.location.protocol.split(\\':\\')[0]; if (curProtocol === \\'https\\') { bp.src = \\'https://zz.bdstatic.com/linksubmit/push.js\\'; } else { bp.src = \\'http://push.zhanzhang.baidu.com/push.js\\'; } var s = document.getElementsByTagName(\"script\")[0]; s.parentNode.insertBefore(bp, s); })(); ;(function (n, e, r, t, a, o, s, i, c, l, f, m, p, u) { o = \\'precollect\\' s = \\'getAttribute\\' i = \\'addEventListener\\' c = \\'PerformanceObserver\\' l = function (e) { f = [].slice.call(arguments) f.push(Date.now(), location.href) ;(e == o ? l.p.a : l.q).push(f) } l.q = [] l.p = { a: [] } n[a] = l m = document.createElement(\\'script\\') m.src = r + \\'?aid=\\' + t + \\'&globalName=\\' + a m.crossorigin = \\'anonymous\\' e.getElementsByTagName(\\'head\\')[0].appendChild(m) if (i in n) { l.pcErr = function (e) { e = e || n.event p = e.target || e.srcElement if (p instanceof Element || p instanceof HTMLElement) { n[a](o, \\'st\\', { tagName: p.tagName, url: p[s](\\'href\\') || p[s](\\'src\\') }) } else { n[a](o, \\'err\\', e.error || e.message) } } l.pcRej = function (e) { e = e || n.event n[a](o, \\'err\\', e.reason || (e.detail && e.detail.reason)) } n[i](\\'error\\', l.pcErr, true) n[i](\\'unhandledrejection\\', l.pcRej, true) } if (\\'PerformanceLongTaskTiming\\' in n) { u = l.pp = { entries: [] } u.observer = new PerformanceObserver(function (e) { u.entries = u.entries.concat(e.getEntries()) }) u.observer.observe({ entryTypes: [\\'longtask\\', \\'largest-contentful-paint\\', \\'layout-shift\\'] }) } })( window, document, \\'https://concat.lietou-static.com/fe-lib-pc/v6/apmplus/1.7.0/browser.cn.js\\', 0, \\'apmPlus\\' ) window.apmPlus(\\'init\\', { aid: 460715, token: \\'26ca6df079bf44f09bd002af5fdb382c\\', plugins: { ajax: false, fetch: false, pageview: true, resource: false, performance: { longtask: true } }, sample: { rules: { pageview: { enable: true, sample_rate: 0.01 }, performance: { sample_rate: 0.0005 }, performance_timing: { sample_rate: 0.0005 }, performance_longtask: { sample_rate: 0.0005 } } } }) window.apmPlus(\\'start\\')'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from requests_html import HTMLSession\n",
    "session = HTMLSession()\n",
    "\n",
    "r = session.get('https://www.liepin.com/zhaopin/?city=050020&dq=050020&pubTime=&currentPage=0&pageSize=40&key=%E5%B9%BF%E5%91%8A%E7%AD%96%E5%88%92&suggestTag=&workYearCode=1&compId=&compName=&compTag=&industry=&salary=&jobKind=&compScale=&compKind=&compStage=&eduLevel=&otherCity=&sfrom=search_job_pc&ckId=e5vr1eqektbxh4oxmrx723hazhhxen98&scene=condition&skId=e5vr1eqektbxh4oxmrx723hazhhxen98&fkId=e5vr1eqektbxh4oxmrx723hazhhxen98&suggestId=')\n",
    "r.html.text"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b3448c29",
   "metadata": {},
   "source": [
    "# 获取工作描述和技能要求文本"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "65657809",
   "metadata": {},
   "outputs": [],
   "source": [
    "from requests_html import HTMLSession\n",
    "session = HTMLSession()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ad68a1c1",
   "metadata": {},
   "source": [
    "# 重命名列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6c8b6796",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "# 读取Excel文件\n",
    "file_path = '猎聘.xlsx'\n",
    "df = pd.read_excel(file_path)\n",
    "# 重命名列名\n",
    "new_column_names = {\n",
    "        'Unnamed: 0':' 序号',\n",
    "        'comp.link':'公司链接',\n",
    "        'comp.compLogo':'公司logo',\n",
    "        'comp.compId':'公司ID',\n",
    "        'job.jobId':'职位ID',\n",
    "        'job.labels':'职位标签',\n",
    "        'job.refreshTime':'职位更新时间',\n",
    "        'job.title':'职位',\n",
    "        'job.salary':'薪资',\n",
    "        'job.dq':'地区',\n",
    "        'job.topJob':'职位的重要程度',\n",
    "        'comp.compStage':'公司融资情况',\n",
    "        'comp.compName':'公司名称',\n",
    "        'comp.compIndustry':'行业',\n",
    "        'comp.compScale':'规模'\n",
    "        'job.requireEduLevel':'学历要求',\n",
    "        'job.requireWorkYears':'经验要求',\n",
    "        'job.campusJobKind':'实习or应届',\n",
    "        'recruiter.recruiterName':'联系人',\n",
    "        'recruiter.recruiterTitle':'联系人职位'}\n",
    "df.rename(columns=new_column_names, inplace=True)\n",
    "# 将修改后的数据保存到新的Excel文件\n",
    "output_file_path = '猎聘_文案_广州.xlsx'\n",
    "df.to_excel(output_file_path, index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "31d70f6d",
   "metadata": {},
   "source": [
    "# 结果分析的词云图"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ca314c19",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyecharts import options as opts\n",
    "from pyecharts.charts import WordCloud,Grid\n",
    "from pyecharts.globals import SymbolType\n",
    "\n",
    "af['job.title']\n",
    "af['job.title'][   af['job.title'].str.contains('（')   ].str.split('（').apply(lambda x:x[0])\n",
    "df_job_title = af['job.title'].apply(lambda x:x.split('（')[0].split('/')[0].split('(')[0]).value_counts()\n",
    "df_job_title\n",
    "df_job_title.index.tolist()\n",
    "PM_title_words = [(  df_job_title.index.tolist()[i],   \n",
    "                   df_job_title.values.tolist()[i]  )    \n",
    "                  for i in range(1,len(df_job_title.index.tolist())\n",
    "                                ) ]\n",
    "PM_title_words\n",
    "worlds = (\n",
    "    WordCloud()\n",
    "    .add(\"\", PM_title_words, word_size_range=[20, 100], shape=SymbolType.DIAMOND)\n",
    "    .set_global_opts(title_opts=opts.TitleOpts(title=\"岗位名称词云图\"))\n",
    "    \n",
    ")\n",
    "worlds.render_notebook()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "459.273px",
    "left": "55px",
    "top": "110.318px",
    "width": "185.761px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
